---
output: 
    pdf_document:
        keep_tex: yes
title: "Appendix B: Belt and Road and UNGA Voting"
geometry: margin=1in
mainfont: cochineal
fontsize: 11pt
linestretch: 1.15
endnote: no
sansitup: no
graphics: yes
toc: yes

header-includes:
- \usepackage{float} #use the 'float' package
- \floatplacement{figure}{H} #make every figure with caption = h
- \usepackage{graphicx}
- \usepackage{longtable}
- \LTcapwidth=.95\textwidth
- \linespread{1.05}
- \usepackage{hyperref}
- \usepackage{booktabs}
- \usepackage{subfig}
- \renewcommand{\figurename}{Figure B.}
- \makeatletter
- \def\fnum@figure{\figurename\thefigure}
- \makeatother

subtitle: 'Codebook'

---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, warning = FALSE, message = F, comment = F)

rm(list=ls())

library(readxl)
library(countrycode)
library(tidyverse)
library(broom)
library(zoo)
library(gsynth)
library(kableExtra)
library(gridExtra)
library(igraph)

```

# Introduction


## Data sources


The data collected for the paper: 


1. BRI data

2. Polity4 Project

3. World Bank data

4. Fariss Human Rights Indicator


## Datasets

We compile three datasets. As we are interested in the changes compared to China and the US, we compile a data set for each of the countries. The following codebook displays the mean and standard deviation of each variable for each comparison country. We also present histograms of each variable displaying the distribution of each variable.

```{r sign-data, }

# In a first step I read the data that contains information on the sign dates. I create a second column with country names to create a dyadic dataset.

# Reading data
sign_data <- read_xlsx("./replication_files/data/data/raw_data/BRI_Dates_byCountry.xlsx",2)

# creating two variables with country names
sign_data$ctry1 <- sign_data$Country
sign_data$ctry2 <- sign_data$Country

# Creating a dataset that contain country combinations
raw_country <- sign_data %>% expand(ctry1, ctry2)

# creating variables with correlates of war indicators
raw_country$cown1 <- countrycode(raw_country$ctry1, 'country.name', 'cown')
raw_country$cown2 <- countrycode(raw_country$ctry2, 'country.name', 'cown')

# Removing countries that are not in the countrycode package
raw_country_excl <- raw_country %>% filter(!is.na(cown1) & !is.na(cown2))

# Selecting only relevant variables
sign_data_sub <- sign_data %>% select(Country, 
                                      Date_MoU_Signed,
                                      Date_Negotiations_Initiated, 
                                      Date_FirstBRIProject_Started)


# in an initial step we append data on sign/negotiation/project dates
country_with_sig <- left_join(raw_country, sign_data_sub,
                              by = c("ctry1" = "Country")) %>%
  rename("ctry1_mou" = "Date_MoU_Signed", 
         "ctry1_neg" = "Date_Negotiations_Initiated",
         "ctry1_bri_start" = "Date_FirstBRIProject_Started") %>%
  left_join(sign_data_sub,
            by = c("ctry2" = "Country")) %>%
  rename("ctry2_mou" = "Date_MoU_Signed", 
         "ctry2_neg" = "Date_Negotiations_Initiated",
         "ctry2_bri_start" = "Date_FirstBRIProject_Started") %>%
  # By expanding the data has created rows in which the countries are the same
  # for example ctry1 = Afghanistan and ctry2 = Afghanistan
  # I remove these observations here. I first create a dummy for observations 
  # that are the same.
  # Then I filter for rows where this is not the case, then drop the variable
  mutate(same_country = ifelse(ctry1 == ctry2, 1, 0)) %>% 
  filter(same_country == 0) %>%
  select(-same_country) %>%
  # Finally, I create a dummy variable for the dyad ID
  mutate(dyad_id = row_number()) %>% 
  relocate(dyad_id) 

# The base data is prepared for now. In the next steps I prepare the aggreagte data that contain the control variables.

```




```{r polity-data, warning=FALSE, }

polity4 <- read_xls("./replication_files/data/data/raw_data/p5v2018.xls")

polity <- polity4 %>% select(ccode, year, polity2) %>% filter(year > 1945)



```


```{r wb-data, }


wb_data <- read_csv("./replication_files/data/data/raw_data/wb-data.csv") %>%
  rename("country" = `Country Name`,
         "year" = "Time",
         "gdp" = `GDP (current US$) [NY.GDP.MKTP.CD]`, 
         "gdp_change" = `GDP growth (annual %) [NY.GDP.MKTP.KD.ZG]`,
         "gdp_cap" = `GDP per capita (current US$) [NY.GDP.PCAP.CD]`) %>%
  select(country, year, gdp, gdp_change, gdp_cap) %>% 
  mutate(gdp = round(as.numeric(gdp), digits= 2), 
         gdp_change = round(as.numeric(gdp_change), digits = 2), 
         gdp_cap = round(as.numeric(gdp_cap), digits = 2), 
         year = as.numeric(year)) 

wb_data$cown <- countrycode(wb_data$country, "country.name", "cown")

wb_data <- na_if(wb_data, "..")

```


```{r, fariss-data, }


fariss_raw <- read_csv("./replication_files/data/data/raw_data/HumanRightsProtectionScores_v4.01.csv")

fariss <- fariss_raw %>% select(YEAR, COW, theta_mean) %>%
  rename(year = YEAR, 
         cown = COW,
         mean_hr = theta_mean)

```



```{r merging-polity2, }

# merge polity by ctry1 and ctry2 and reanme each


polity_data1 <- left_join(country_with_sig, polity,
                          by = c("cown1" = "ccode")) %>%
  rename("ctry1_polity2" = "polity2")

polity_data2 <-  left_join(polity_data1, polity, by = c("cown2" = "ccode",
                                                        "year" = 'year')) %>%
  rename("ctry2_polity2" = "polity2")

```

```{r merging-wb, }

# merge wb by ctry1 and ctry2 and reanme each

wb_data1 <- left_join(polity_data2, wb_data, by = c("cown1" = "cown",
                                                    "year" = "year")) %>%
  rename("ctry1_gdp" = "gdp",
         "ctry1_gdp_change" = "gdp_change",
         "ctry1_gdp_cap" = "gdp_cap")

wb_data2 <- left_join(wb_data1, wb_data, by = c("cown2" = "cown",
                                                "year" = "year")) %>%
  rename("ctry2_gdp" = "gdp",
         "ctry2_gdp_change" = "gdp_change",
         "ctry2_gdp_cap" = "gdp_cap")


```


```{r}

invest_data <- read_xlsx("./replication_files/data/data/raw_data/China-Global-Investment-Tracker-2022-SPRING-final-1.xlsx", sheet = 4, skip = 5)

invest_data <- invest_data %>%
  group_by(Country, Year) %>%
  mutate(sum_invest = sum(`Quantity in Millions`)) %>%
  relocate(`Quantity in Millions`, sum_invest)

invest_data <- invest_data %>% 
  select(Year, Country, sum_invest) %>%
  distinct() %>% 
  rename(year = Year) %>% 
  select(-Country)

invest_data <- invest_data %>% mutate(cown = countrycode(Country, "country.name", "cown"))

```


```{r merging-fariss, }

# merge fariss by ctry1 and ctry2 and rename each

fariss1 <- left_join(wb_data2, fariss, by = c("cown1" = "cown",
                                              "year" = "year")) %>%
  rename("ctry1_fariss" = "mean_hr")

raw_invest  <- left_join(fariss1, invest_data, by = c("cown1" = "cown",
                                              "year" = "year"))


raw_data <- left_join(raw_invest, fariss, by = c("cown2" = "cown",
                                              "year" = "year")) %>%
  rename("ctry2_fariss" = "mean_hr") %>%
  select(-c(country.x, country.y))

```



```{r distances, }
rm(list=ls()[! ls() %in% c("raw_data")])

# Calculating the absolute distance between variables and rearranging the data
raw_distances <- raw_data %>%
  ungroup() %>% 
  filter(year %in% c(1990:2018)) %>% 
  mutate(polity2_dist = abs(ctry1_polity2 - ctry2_polity2),
         gdp_dist = abs(ctry1_gdp - ctry2_gdp),
         gdp_change_dist = abs(ctry1_gdp_change - ctry2_gdp_change),
         gdp_cap_dist = abs(ctry1_gdp_cap - ctry2_gdp_cap),
         fariss_dist = abs(ctry1_fariss - ctry2_fariss)) %>%
  relocate(dyad_id, ctry1, cown1,
           ctry2, cown2,
           year, 
           ctry1_mou, ctry2_mou,
           ctry1_neg, ctry2_neg,
           ctry1_bri_start, ctry2_bri_start, 
           ctry1_polity2, ctry2_polity2, polity2_dist,
           ctry1_gdp, ctry2_gdp, gdp_dist,
           ctry1_gdp_cap, ctry2_gdp_cap, gdp_cap_dist,
           ctry1_gdp_change, ctry2_gdp_change, gdp_change_dist, 
           ctry1_fariss, ctry2_fariss, fariss_dist)
```


```{r , }

agreement <- read_csv("./replication_files/data/data/raw_data/AgreementScoresAll_Mar2021.csv") %>%
  select(ccode1, ccode2, year, IdealPointDistance) %>%
  group_by(ccode1, ccode2) %>%
  mutate(IdealPointDistance_lag = lag(IdealPointDistance),
         IdealPointDistance_diff = IdealPointDistance-IdealPointDistance_lag) 

full_data <- left_join(raw_distances, agreement,
                       by = c("cown1" = "ccode1",
                              "cown2" = "ccode2",
                              "year" = "year"))

write_csv2(full_data, "./replication_files/data/data/final_data/full_data.csv")

```


```{r, }
full_data <- read_csv2('./replication_files/data/data/final_data/full_data.csv')
china_data <- full_data %>% filter(ctry2 == 'China')
write_csv2(china_data, "./replication_files/data/data/final_data/china_data.csv")


usa_data <- full_data %>% filter(ctry2 == 'United States')
write_csv2(usa_data, "./replication_files/data/data/final_data/usa_data.csv")

russia_data <- full_data %>% filter(ctry2 == 'Russian Federation')
write_csv2(russia_data, "./replication_files/data/data/final_data/russia_data.csv")

rm(agreement, full_data, raw_data, raw_distances)
```


```{r}
china_data <- read_csv2("./replication_files/data/data/final_data/china_data.csv")
usa_data <- read_csv2("./replication_files/data/data/final_data/usa_data.csv")
russia_data <- read_csv2("./replication_files/data/data/final_data/russia_data.csv")
```


\newpage


# Codebook

## dyad_id

Numeric ID of country pairs. For example: Afghanistan - Albania is ID 1.

## ctry1

First country in Dyad ID, country name.

## ctry2 

Second country in Dyad ID, country name.

## cown1 

First country in Dyad ID, correlates of war numeric identifier.

## cown2 

First country in Dyad ID, correlates of war numeric identifier.    

## year 

Numeric year     

## ctry1_mou 

Date of MoU signature for country 1 in Dyad

## ctry2_mou  

Date of MoU signature for country 2 in Dyad

## ctry1_neg 

Date of MoU-negotiation start for country 1 in Dyad

## ctry2_neg 

Date of MoU-negotiation start for country 2 in Dyad

## ctry1_bri_start

Date of construction start for country 1 in Dyad

## ctry2_bri_start 

Date of construction start for country 2 in Dyad

\newpage


## ctry1_polity2 

Country 1 Polity2 Score. "Revised Combined Polity Score: This variable is a modified version of the POLITY variable added in order to facilitate the use of the POLITY regime measure in time-series analyses." (from Polity codebook)



```{r}
p1 <- ggplot(china_data, aes(ctry1_polity2)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry1_polity2, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_polity2, na.rm = T) - sd(china_data$ctry1_polity2, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_polity2, na.rm = T) + sd(china_data$ctry1_polity2, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Score',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry1_polity2)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry1_polity2, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_polity2, na.rm = T) - sd(usa_data$ctry1_polity2, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_polity2, na.rm = T) + sd(usa_data$ctry1_polity2, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Score',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry1_polity2)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry1_polity2, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_polity2, na.rm = T) - sd(russia_data$ctry1_polity2, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_polity2, na.rm = T) + sd(russia_data$ctry1_polity2, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Score',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
``` 

```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry2_polity2, na.rm = T), 2),
                         round(sd(china_data$ctry2_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry2_polity2, na.rm = T), 2),
                         round(sd(usa_data$ctry2_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry2_polity2, na.rm = T), 2),
                         round(sd(russia_data$ctry2_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())


knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## ctry2_polity2 


Country 2 Polity2 Score. "Revised Combined Polity Score: This variable is a modified version of the POLITY variable added in order to facilitate the use of the POLITY regime measure in time-series analyses." (from Polity codebook)

These are the polity scores of China, Russia, and the US respectively.


```{r}
p1 <- ggplot(china_data, aes(ctry2_polity2)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry2_polity2, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_polity2, na.rm = T) - sd(china_data$ctry2_polity2, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_polity2, na.rm = T) + sd(china_data$ctry2_polity2, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Score',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry2_polity2)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry2_polity2, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_polity2, na.rm = T) - sd(usa_data$ctry2_polity2, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_polity2, na.rm = T) + sd(usa_data$ctry2_polity2, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Score',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry2_polity2)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry2_polity2, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_polity2, na.rm = T) - sd(russia_data$ctry2_polity2, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_polity2, na.rm = T) + sd(russia_data$ctry2_polity2, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Score',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
``` 

```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry1_polity2, na.rm = T), 2),
                         round(sd(china_data$ctry1_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry1_polity2, na.rm = T), 2),
                         round(sd(usa_data$ctry1_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry1_polity2, na.rm = T), 2),
                         round(sd(russia_data$ctry1_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())


knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```

```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry2_polity2, na.rm = T), 2),
                         round(sd(china_data$ctry2_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry2_polity2, na.rm = T), 2),
                         round(sd(usa_data$ctry2_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry2_polity2, na.rm = T), 2),
                         round(sd(russia_data$ctry2_polity2, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())

knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```

\newpage

## polity2_dist

Absolute distance between the polity2 scores of country 1 and country 2.


```{r}
p1 <- ggplot(china_data, aes(polity2_dist)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$polity2_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$polity2_dist, na.rm = T) - sd(china_data$polity2_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$polity2_dist, na.rm = T) + sd(china_data$polity2_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Distance',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(polity2_dist)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$polity2_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$polity2_dist, na.rm = T) - sd(usa_data$polity2_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$polity2_dist, na.rm = T) + sd(usa_data$polity2_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Distance',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(polity2_dist)) +
  geom_bar(fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$polity2_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$polity2_dist, na.rm = T) - sd(russia_data$polity2_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$polity2_dist, na.rm = T) + sd(russia_data$polity2_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Polity2 Distance',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
``` 

```{r}

china <- as_tibble(cbind(round(mean(china_data$polity2_dist, na.rm = T), 2),
                         round(sd(china_data$polity2_dist, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$polity2_dist, na.rm = T), 2),
                         round(sd(usa_data$polity2_dist, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$polity2_dist, na.rm = T), 2),
                         round(sd(russia_data$polity2_dist, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())


knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```


\newpage

## ctry1_gdp 

Gross Domestic Product in US Dollars (2015), taking from World Bank, for country 1.      


```{r}
p1 <- ggplot(china_data, aes(ctry1_gdp)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp, na.rm = T) - sd(china_data$ctry1_gdp, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp, na.rm = T) + sd(china_data$ctry1_gdp, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry1_gdp)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp, na.rm = T) - sd(usa_data$ctry1_gdp, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp, na.rm = T) + sd(usa_data$ctry1_gdp, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry1_gdp)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp, na.rm = T) - sd(russia_data$ctry1_gdp, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp, na.rm = T) + sd(russia_data$ctry1_gdp, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```
```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry1_gdp, na.rm = T), 2),
                         round(sd(china_data$ctry1_gdp, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry1_gdp, na.rm = T), 2),
                         round(sd(usa_data$ctry1_gdp, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry1_gdp, na.rm = T), 2),
                         round(sd(russia_data$ctry1_gdp, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## ctry2_gdp 

Gross Domestic Product in US Dollars (2015), taking from World Bank, for country 2.


```{r}
p1 <- ggplot(china_data, aes(ctry2_gdp)) +
  geom_histogram(bins = 20, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp, na.rm = T) - sd(china_data$ctry2_gdp, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp, na.rm = T) + sd(china_data$ctry2_gdp, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry2_gdp)) +
  geom_histogram(bins = 20, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp, na.rm = T) - sd(usa_data$ctry2_gdp, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp, na.rm = T) + sd(usa_data$ctry2_gdp, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry2_gdp)) +
  geom_histogram(bins = 20, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp, na.rm = T) - sd(russia_data$ctry2_gdp, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp, na.rm = T) + sd(russia_data$ctry2_gdp, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```
```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry2_gdp, na.rm = T), 2),
                         round(sd(china_data$ctry2_gdp, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry2_gdp, na.rm = T), 2),
                         round(sd(usa_data$ctry2_gdp, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry2_gdp, na.rm = T), 2),
                         round(sd(russia_data$ctry2_gdp, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## gdp_dist 

Absolute difference between gdp of country 1 and country 2.


```{r}
p1 <- ggplot(china_data, aes(gdp_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$gdp_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$gdp_dist, na.rm = T) - sd(china_data$gdp_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$gdp_dist, na.rm = T) + sd(china_data$gdp_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Distance',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(gdp_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$gdp_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$gdp_dist, na.rm = T) - sd(usa_data$gdp_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$gdp_dist, na.rm = T) + sd(usa_data$gdp_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Distance',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(gdp_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$gdp_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$gdp_dist, na.rm = T) - sd(russia_data$gdp_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$gdp_dist, na.rm = T) + sd(russia_data$gdp_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Distance',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```


```{r}

china <- as_tibble(cbind(round(mean(china_data$gdp_dist, na.rm = T), 2),
                         round(sd(china_data$gdp_dist, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$gdp_dist, na.rm = T), 2),
                         round(sd(usa_data$gdp_dist, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$gdp_dist, na.rm = T), 2),
                         round(sd(russia_data$gdp_dist, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())


knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```


\newpage

## ctry1_gdp_cap 

Gross Domestic Product per Capita in US Dollars (2015), taking from World Bank, for country 2.


```{r}
p1 <- ggplot(china_data, aes(ctry1_gdp_cap)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp_cap, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp_cap, na.rm = T) - sd(china_data$ctry1_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp_cap, na.rm = T) + sd(china_data$ctry1_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry1_gdp_cap)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp_cap, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp_cap, na.rm = T) - sd(usa_data$ctry1_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp_cap, na.rm = T) + sd(usa_data$ctry1_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry1_gdp_cap)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp_cap, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp_cap, na.rm = T) - sd(russia_data$ctry1_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp_cap, na.rm = T) + sd(russia_data$ctry1_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```


```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry1_gdp_cap, na.rm = T), 2),
                         round(sd(china_data$ctry1_gdp_cap, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry1_gdp_cap, na.rm = T), 2),
                         round(sd(usa_data$ctry1_gdp_cap, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry1_gdp_cap, na.rm = T), 2),
                         round(sd(russia_data$ctry1_gdp_cap, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## ctry2_gdp_cap

Gross Domestic Product per capita in US Dollars (2015), taking from World Bank, for country 2.   


```{r}
p1 <- ggplot(china_data, aes(ctry2_gdp_cap)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp_cap, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp_cap, na.rm = T) - sd(china_data$ctry2_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp_cap, na.rm = T) + sd(china_data$ctry2_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry2_gdp_cap)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp_cap, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp_cap, na.rm = T) - sd(usa_data$ctry2_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp_cap, na.rm = T) + sd(usa_data$ctry2_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry2_gdp_cap)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp_cap, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp_cap, na.rm = T) - sd(russia_data$ctry2_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp_cap, na.rm = T) + sd(russia_data$ctry2_gdp_cap, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```

```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry2_gdp_cap, na.rm = T), 2),
                         round(sd(china_data$ctry2_gdp_cap, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry2_gdp_cap, na.rm = T), 2),
                         round(sd(usa_data$ctry2_gdp_cap, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry2_gdp_cap, na.rm = T), 2),
                         round(sd(russia_data$ctry2_gdp_cap, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```




\newpage

## gdp_cap_dist

Absolute difference ind GDP per capita between country 1 and country 2.


```{r}
p1 <- ggplot(china_data, aes(gdp_cap_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$gdp_cap_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$gdp_cap_dist, na.rm = T) - sd(china_data$gdp_cap_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$gdp_cap_dist, na.rm = T) + sd(china_data$gdp_cap_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita Distance',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(gdp_cap_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$gdp_cap_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$gdp_cap_dist, na.rm = T) - sd(usa_data$gdp_cap_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$gdp_cap_dist, na.rm = T) + sd(usa_data$gdp_cap_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita Distance',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(gdp_cap_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$gdp_cap_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$gdp_cap_dist, na.rm = T) - sd(russia_data$gdp_cap_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$gdp_cap_dist, na.rm = T) + sd(russia_data$gdp_cap_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP per Capita Distance',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```



```{r}

china <- as_tibble(cbind(round(mean(china_data$gdp_cap_dist, na.rm = T), 2),
                         round(sd(china_data$gdp_cap_dist, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$gdp_cap_dist, na.rm = T), 2),
                         round(sd(usa_data$gdp_cap_dist, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$gdp_cap_dist, na.rm = T), 2),
                         round(sd(russia_data$gdp_cap_dist, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## ctry1_gdp_change

Percentage change in GDP for country 1.

```{r}
p1 <- ggplot(china_data, aes(ctry1_gdp_change)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp_change, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp_change, na.rm = T) - sd(china_data$ctry1_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_gdp_change, na.rm = T) + sd(china_data$ctry1_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Change',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry1_gdp_change)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp_change, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp_change, na.rm = T) - sd(usa_data$ctry1_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_gdp_change, na.rm = T) + sd(usa_data$ctry1_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Change',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry1_gdp_change)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp_change, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp_change, na.rm = T) - sd(russia_data$ctry1_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_gdp_change, na.rm = T) + sd(russia_data$ctry1_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Change',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```

```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry1_gdp_change, na.rm = T), 2),
                         round(sd(china_data$ctry1_gdp_change, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry1_gdp_change, na.rm = T), 2),
                         round(sd(usa_data$ctry1_gdp_change, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry1_gdp_change, na.rm = T), 2),
                         round(sd(russia_data$ctry1_gdp_change, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## ctry2_gdp_change 

Percentage change in GDP for country 2.


```{r}
p1 <- ggplot(china_data, aes(ctry2_gdp_change)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp_change, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp_change, na.rm = T) - sd(china_data$ctry2_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_gdp_change, na.rm = T) + sd(china_data$ctry2_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Change',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry2_gdp_change)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp_change, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp_change, na.rm = T) - sd(usa_data$ctry2_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_gdp_change, na.rm = T) + sd(usa_data$ctry2_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Change',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry2_gdp_change)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp_change, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp_change, na.rm = T) - sd(russia_data$ctry2_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_gdp_change, na.rm = T) + sd(russia_data$ctry2_gdp_change, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP Change',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```


```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry2_gdp_change, na.rm = T), 2),
                         round(sd(china_data$ctry2_gdp_change, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry2_gdp_change, na.rm = T), 2),
                         round(sd(usa_data$ctry2_gdp_change, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry2_gdp_change, na.rm = T), 2),
                         round(sd(russia_data$ctry2_gdp_change, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```


\newpage

## gdp_change_dist

Absolute distance in percentage change in GDP between country 1 and country 2.


```{r}
p1 <- ggplot(china_data, aes(gdp_change_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$gdp_change_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$gdp_change_dist, na.rm = T) - sd(china_data$gdp_change_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$gdp_change_dist, na.rm = T) + sd(china_data$gdp_change_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP change difference',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(gdp_change_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$gdp_change_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$gdp_change_dist, na.rm = T) - sd(usa_data$gdp_change_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$gdp_change_dist, na.rm = T) + sd(usa_data$gdp_change_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP change difference',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(gdp_change_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$gdp_change_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$gdp_change_dist, na.rm = T) - sd(russia_data$gdp_change_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$gdp_change_dist, na.rm = T) + sd(russia_data$gdp_change_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'GDP change difference',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```

```{r}

china <- as_tibble(cbind(round(mean(china_data$gdp_change_dist, na.rm = T), 2),
                         round(sd(china_data$gdp_change_dist, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$gdp_change_dist, na.rm = T), 2),
                         round(sd(usa_data$gdp_change_dist, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$gdp_change_dist, na.rm = T), 2),
                         round(sd(russia_data$gdp_change_dist, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```


\newpage

## ctry1_fariss

the posterior mean of the new latent variable for Country 1 described in "Fariss, Christopher; Michael Kenwick; Kevin Reuning, 2020, "Latent Human Rights Protection Scores Version 4", https://doi.org/10.7910/DVN/RQ85GK, Harvard Dataverse, V2, UNF:6:QPg88sybNJyuljPYph2OXQ== [fileUNF]"

```{r}
p1 <- ggplot(china_data, aes(ctry1_fariss)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry1_fariss, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_fariss, na.rm = T) - sd(china_data$ctry1_fariss, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry1_fariss, na.rm = T) + sd(china_data$ctry1_fariss, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Score',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry1_fariss)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry1_fariss, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_fariss, na.rm = T) - sd(usa_data$ctry1_fariss, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry1_fariss, na.rm = T) + sd(usa_data$ctry1_fariss, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Score',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry1_fariss)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry1_fariss, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_fariss, na.rm = T) - sd(russia_data$ctry1_fariss, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry1_fariss, na.rm = T) + sd(russia_data$ctry1_fariss, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Score',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```

```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry1_fariss, na.rm = T), 2),
                         round(sd(china_data$ctry1_fariss, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry1_fariss, na.rm = T), 2),
                         round(sd(usa_data$ctry1_fariss, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry1_fariss, na.rm = T), 2),
                         round(sd(russia_data$ctry1_fariss, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```


\newpage

## ctry2_fariss

the posterior mean of the new latent variable for Country 2 described in "Fariss, Christopher; Michael Kenwick; Kevin Reuning, 2020, "Latent Human Rights Protection Scores Version 4", https://doi.org/10.7910/DVN/RQ85GK, Harvard Dataverse, V2, UNF:6:QPg88sybNJyuljPYph2OXQ== [fileUNF]"


```{r}
p1 <- ggplot(china_data, aes(ctry2_fariss)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$ctry2_fariss, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_fariss, na.rm = T) - sd(china_data$ctry2_fariss, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$ctry2_fariss, na.rm = T) + sd(china_data$ctry2_fariss, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Score',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(ctry2_fariss)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$ctry2_fariss, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_fariss, na.rm = T) - sd(usa_data$ctry2_fariss, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$ctry2_fariss, na.rm = T) + sd(usa_data$ctry2_fariss, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Score',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(ctry2_fariss)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$ctry2_fariss, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_fariss, na.rm = T) - sd(russia_data$ctry2_fariss, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$ctry2_fariss, na.rm = T) + sd(russia_data$ctry2_fariss, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Score',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```


```{r}

china <- as_tibble(cbind(round(mean(china_data$ctry2_fariss, na.rm = T), 2),
                         round(sd(china_data$ctry2_fariss, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$ctry2_fariss, na.rm = T), 2),
                         round(sd(usa_data$ctry2_fariss, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$ctry2_fariss, na.rm = T), 2),
                         round(sd(russia_data$ctry2_fariss, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```


\newpage

## fariss_dist 

Distance in Fariss HR scores for country 1 and country 2.


```{r}
p1 <- ggplot(china_data, aes(fariss_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$fariss_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$fariss_dist, na.rm = T) - sd(china_data$fariss_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$fariss_dist, na.rm = T) + sd(china_data$fariss_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Distance',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(fariss_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$fariss_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$fariss_dist, na.rm = T) - sd(usa_data$fariss_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$fariss_dist, na.rm = T) + sd(usa_data$fariss_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Distance',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(fariss_dist)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$fariss_dist, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$fariss_dist, na.rm = T) - sd(russia_data$fariss_dist, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$fariss_dist, na.rm = T) + sd(russia_data$fariss_dist, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Fariss Distance',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```


```{r}

china <- as_tibble(cbind(round(mean(china_data$fariss_dist, na.rm = T), 2),
                         round(sd(china_data$fariss_dist, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$fariss_dist, na.rm = T), 2),
                         round(sd(usa_data$fariss_dist, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$fariss_dist, na.rm = T), 2),
                         round(sd(russia_data$fariss_dist, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

## IdealPointDistance

Voting similarity index between ccode1 and ccode2 in a given sesson – computed using 3 category vote data (1 = “yes” or approval for an issue; 2 = abstain, 3 = “no” or disapproval for an issue.) - Abstention is counted as half-agreement with a yes or no vote. (from: Erik Voeten "Data and Analyses of Voting in the UN General Assembly" Routledge Handbook of International Organization, edited by Bob Reinalda (published May 27, 2013). Available at SSRN: http://ssrn.com/abstract=2111149)


```{r}
p1 <- ggplot(china_data, aes(IdealPointDistance)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(china_data$IdealPointDistance, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(china_data$IdealPointDistance, na.rm = T) - sd(china_data$IdealPointDistance, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(china_data$IdealPointDistance, na.rm = T) + sd(china_data$IdealPointDistance, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Idealpoint Distance',
       y = 'Count',
       title = 'China') +
  theme_minimal()

p2<- ggplot(usa_data, aes(IdealPointDistance)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(usa_data$IdealPointDistance, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$IdealPointDistance, na.rm = T) - sd(usa_data$IdealPointDistance, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(usa_data$IdealPointDistance, na.rm = T) + sd(usa_data$IdealPointDistance, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Idealpoint Distance',
       y = 'Count',
       title = 'USA') +
  theme_minimal()


p3 <- ggplot(russia_data, aes(IdealPointDistance)) +
  geom_histogram(bins = 50, fill = 'gray80', color = 'black') +
  geom_vline(xintercept = mean(russia_data$IdealPointDistance, na.rm = T), color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$IdealPointDistance, na.rm = T) - sd(russia_data$IdealPointDistance, na.rm = T), lty = 2, color = 'gray40') +
  geom_vline(xintercept = mean(russia_data$IdealPointDistance, na.rm = T) + sd(russia_data$IdealPointDistance, na.rm = T), lty = 2, color = 'gray40') +
  labs(x = 'Idealpoint Distance',
       y = 'Count',
       title = 'Russia') +
  theme_minimal()

grid.arrange(p1, p2, p3)
```


```{r}

china <- as_tibble(cbind(round(mean(china_data$IdealPointDistance, na.rm = T), 2),
                         round(sd(china_data$IdealPointDistance, na.rm = T), 2))) %>%
  mutate(Country = 'China')


usa <- as_tibble(cbind(round(mean(usa_data$IdealPointDistance, na.rm = T), 2),
                         round(sd(usa_data$IdealPointDistance, na.rm = T), 2))) %>%
  mutate(Country = 'United States')


russia <- as_tibble(cbind(round(mean(russia_data$IdealPointDistance, na.rm = T), 2),
                         round(sd(russia_data$IdealPointDistance, na.rm = T), 2))) %>%
  mutate(Country = 'Russia')


table_data <- rbind(china, usa, russia) %>%
  relocate(Country, everything())



knitr::kable(
  table_data,
#  caption = 'List of country names',
  col.names = c('Comparrison Country', 'Mean', 'Standard Deviation'),
  booktabs = TRUE
) %>%
  kable_styling(position = "center", latex_options = "HOLD_position")


```



\newpage

# Identifying voting communities



```{r, }
# Reading data
rm(list = ls())

full <- read.csv2('./replication_files/data/data/final_data/full_data.csv')

```



```{r, }
# Preparing data
## I include here info on whether a country is treated etc.
## This is taken from the data script

# Create a new sign-date variable that is in a "date" format
full$ctry1_mou2 <- as.Date(full$ctry1_mou, format = c("%d.%m.%Y"))

# Create a preliminary variable for UNGA session
# The UN Session we are interested in started in September. MOUs signed before
# September of the respective years will therefore be recorded as treated.

full <- full %>% filter(!is.na(year)) %>%
  mutate(un_date = paste(year, "", "-09"),
         un_date = gsub(" ", "", un_date, fixed = TRUE))

# We don't need the days so we create compatible year and month variables
full$un_date <- as.yearmon(full$un_date)
full$ctry1_mou <- as.yearmon(full$ctry1_mou2)

# Now we need to create a variable for the treatment year. 
# I need to identify the first year in which the treatment took place
treated_year_data <-  full %>%
  mutate(treated_years = ifelse(un_date > ctry1_mou, 1, 0)) %>%
  select(ctry1, year, treated_years) %>% 
  filter(treated_years == 1) %>%
  group_by(ctry1) %>%
  filter(year == min(year, na.rm = T)) %>%
  mutate(treated_year = year) %>%
  select(ctry1, treated_years, treated_year) %>% 
  distinct()
             
# Adding the treatment year to the data and creating a treatment timeframe    
full <- full %>% left_join(treated_year_data, by = "ctry1") %>%
  group_by(ctry1) %>%
  mutate(treated_year = ifelse(is.na(treated_year), 2013, treated_year),
         treatment_timeframe = year-treated_year,
         )
                                   
# Ge treated countries
full <- full %>%
  group_by(ctry1) %>%
  mutate(mean = mean(treated_years),
         treated_country = ifelse(is.na(mean), 0, 1),
         treated = treatment_timeframe*treated_country, 
         treated = ifelse(treated > 0, 1, 0))
```


```{r, }
# Creating the basis for the network data
network_base  <- full %>% 
  drop_na(cown2) %>%
  select(ctry1, ctry2, year, treated_country, IdealPointDistance)

write_csv2(network_base, './replication_files/data/data/final_data/network_base.csv')


rm(list=ls())
```


### Restart with clean environment here


```{r, }
network_base <- read_csv2('./replication_files/data/data/final_data/network_base.csv')

```


```{r, }

# The function takes the network_base data calculates the 5 percentile distance and compares the distance of each country to the 5th percentile. If it is above it drops the country

id_function <- function(country, year) {
  
  network_base %>% 
    filter(year %in% seq(t_year-10:t_year)) %>% 
    group_by(ctry1, ctry2) %>%
    summarize(mean_dist = mean(IdealPointDistance)) %>%
    drop_na() %>%
    group_by(ctry1) %>%
    # Here we vary the cut-off point
    # Top 5th percentile
    mutate(quantile_10 = quantile(mean_dist, 0.05)) %>%
    filter(mean_dist < quantile_10) 
  
}
```





```{r, }
c <- network_base %>% filter(treated_country == 1) %>% 
  select(ctry1, year) %>%
  filter(year == 2013) %>%
  distinct()

countries <- c$ctry1
t_year <- unique(c$year)

```


```{r, , fig.show = 'hide'}

# Container
datalist = list()


# For loop for data with ID
for (i in countries) {
    # ... make some data
    dat <- id_function(i)
    #dat$i <- i  # maybe you want to keep track of which iteration produced it?
    datalist[[i]] <- dat # add it to your list
}

data <- bind_rows(datalist) %>% distinct()

data_saved <- data

# Here we vary the 0.25 threshold
data <- data %>% filter(mean_dist < 0.25)

ggplot(data, aes(mean_dist)) +
  geom_histogram(bins = 50, color = 'black', fill = 'gray80') +
  theme_minimal() +
  labs(x = 'Average Distance',
       y = 'Count')

ggsave('./replication_files/analysis/analysis/graphs/network/distribution-of-distances.pdf')
```


## Distances between countries

We create a network based on the average distance between UNGA Idealponts over a ten year period, prior to the treatment, i.e. the accession to the BRI. The idea is, that we need to identify voting blocs to accurately identify the sensitive changes in ideal points. To do so, we calculate the average distance and select those countries that are within the 5 most proximate percentile to the country under consideration. 

To give an example, we calculate the average distance of each country to Hungary and select the countries that are within the top 5th percentile.

```{r, }
test <- knitr::kable(data %>% filter(ctry1 == 'Hungary'),
             caption = "Table 1: Most proximate countries to Hungary",
             col.names = c("Country 1", " Country 2", "Average Distance", "Fifth percentile"),
             booktabs = TRUE, 
             format = "latex") %>%
  kable_styling(font_size = 8)

```

We repeat this process for each country in the data. The distances are distributed as follows:

```{r, fig.cap ='Distibution of average distances (10 year period 2004-2013)',  out.width='75%', fig.align='center'}
knitr::include_graphics('./replication_files/analysis/analysis/graphs/network/distribution-of-distances.pdf')
```


## Networks

Based on the above-calculated distances, we create a network of voting blocs in the UNGA, based on their ideal point distances between 2004 and 2013. 

```{r}
# Here I create the network. It really is quite simple
# Relocate variables. First two have to be the connected entities
data <- data %>% select(-quantile_10) %>% relocate(ctry1, ctry2, everything())

# Create the network object
network_full <- graph_from_data_frame(d = data, directed = F)

# Calculate the clusters
#cluster_full <- cluster_optimal(network_full, weights = NULL)
cluster_full <- network_full  %>% cluster_walktrap() 

# Specify the number of clusters
#cluster_full$membership<- cluster_full %>% cut_at(no = 10)

# Storing the cluster information in a dataframe
# We will use this later to specify the clusters across the years
cluster_data <- as_tibble(cbind(cluster_full$names, cluster_full$membership)) %>% 
  arrange(V2) %>%
  rename('ctry1' = 'V1', 
         'cluster' = 'V2') %>%
  mutate(cluster = as.numeric(cluster))

degree <- degree(network_full)

number_of_ties <- enframe(degree) %>%
   unnest

    
# Here we vary the the minimum number of ties
cluster_data <- left_join(cluster_data, number_of_ties,
                          by = c('ctry1' = 'name')) %>% 
  filter(value > 3)



write_csv2(cluster_data, './replication_files/analysis/analysis/graphs/network/final_data/cluster.csv')

cluster_data <- read_csv2('./replication_files/data/data/final_data/cluster.csv')

```


```{r, message = F, comment = F}

V(network_full)$frame.color <- "white"
E(network_full)$arrow.mode <- 0


# Create the network figure
pdf('./replication_files/analysis/analysis/graphs/network/full-network.pdf')
plot.igraph(network_full,  vertex.color=membership(cluster_full), vertex.size = 7, vertex.label.cex=0.45)
dev.off()

```


We first plot a full network, including all countries. It displays, however a large cluster in the middle. To further specify the voting blocs we subset this large cluster, re-create a network, a sub-network, and use again a community detection algorithm to identify clusters.

```{r, fig.cap ='Network of voting clusters (10 year period 2004-2013)',  out.width='100%', }
knitr::include_graphics('./replication_files/analysis/analysis/graphs/network/full-network.pdf')
```


## List of communities

```{r, }
knitr::kable(
  list(cluster_data[1:31,], cluster_data[32:61,]),
  caption = 'Comminuty Membership I',
  col.names = c("Country", 'Cluster', 'Ties'),
  booktabs = TRUE, 
  "latex") %>%
  kable_styling(font_size = 8)

knitr::kable(
  list(cluster_data[62:93,], cluster_data[94:125,]),
  caption = 'Comminuty Membership II',
  col.names = c("Country", 'Cluster', 'Ties'),
  booktabs = TRUE) %>%
  kable_styling(font_size = 8)


```



```{r}
china <- read_csv2('./replication_files/data/data/final_data/china_data.csv')


china <- left_join(china, cluster_data, by = 'ctry1')


```

```{r}
# Create a new sign-date variable that is in a "date" format
china$ctry1_mou2 <- as.Date(china$ctry1_mou, format = c("%d.%m.%Y"))


# gsub(" ", "", china$un_date, fixed = TRUE)


# Create a preliminary variable for UNGA session
# The UN Session we are interested in started in September. MOUs signed before 
# September of the respective years will therefore be recorded as treated.
china <- china %>% filter(!is.na(year)) %>%
  mutate(un_date = paste(year, "", "-09"),
         un_date = gsub(" ", "", un_date, fixed = TRUE))

# We don't need the days so we create compatible year and month variables
china$un_date <- as.yearmon(china$un_date)
china$ctry1_mou <- as.yearmon(china$ctry1_mou2)


# Now we need to create a variable for the treatment year. 
# I need to identify the first year in which the treatment took place
treated_year_data <-  china %>%
  mutate(treated_years = ifelse(un_date > ctry1_mou, 1, 0)) %>%
  select(ctry1, year, treated_years) %>% 
  filter(treated_years == 1) %>%
  group_by(ctry1) %>%
  filter(year == min(year, na.rm = T)) %>%
  mutate(treated_year = year) %>%
  select(ctry1, treated_years, treated_year)
             

# Adding the treatment year to the data and creating a treatment timeframe    
china <- china %>% left_join(treated_year_data, by = "ctry1") %>%
  group_by(ctry1) %>%
  mutate(treated_year = ifelse(is.na(treated_year), 2013, treated_year),
         treatment_timeframe = year-treated_year,
         )
                                   

# Get treated countries
china <- china %>%
  group_by(ctry1) %>%
  mutate(mean = mean(treated_years),
         treated_country = ifelse(is.na(mean), 0, 1),
         treated = treatment_timeframe*treated_country, 
         treated = ifelse(treated > 0, 1, 0))

df <- china %>% select(ctry1, IdealPointDistance, year, treatment_timeframe, ctry1_polity2,
                       ctry1_gdp_cap, ctry1_fariss, cluster,
                       treated_country, treated, gdp_cap_dist, polity2_dist, fariss_dist, sum_invest) %>%
  filter(year > 2005)

write_csv2(df, './replication_files/data/data/final_data/china_data_with_cluster.csv')

```



```{r}
usa <- read_csv2('./replication_files/data/data/final_data/usa_data.csv')


usa <- left_join(usa, cluster_data, by = 'ctry1')


```


```{r}
# Create a new sign-date variable that is in a "date" format
usa$ctry1_mou2 <- as.Date(usa$ctry1_mou, format = c("%d.%m.%Y"))


# gsub(" ", "", usa$un_date, fixed = TRUE)


# Create a preliminary variable for UNGA session
# The UN Session we are interested in started in September. MOUs signed before 
# September of the respective years will therefore be recorded as treated.

usa <- usa %>% filter(!is.na(year)) %>%
  mutate(un_date = paste(year, "", "-09"),
         un_date = gsub(" ", "", un_date, fixed = TRUE))

# We don't need the days so we create compatible year and month variables
usa$un_date <- as.yearmon(usa$un_date)
usa$ctry1_mou <- as.yearmon(usa$ctry1_mou2)


# Now we need to create a variable for the treatment year. 
# I need to identify the first year in which the treatment took place
treated_year_data <-  usa %>%
  mutate(treated_years = ifelse(un_date > ctry1_mou, 1, 0)) %>%
  select(ctry1, year, treated_years) %>% 
  filter(treated_years == 1) %>%
  group_by(ctry1) %>%
  filter(year == min(year, na.rm = T)) %>%
  mutate(treated_year = year) %>%
  select(ctry1, treated_years, treated_year)
             

# Adding the treatment year to the data and creating a treatment timeframe   
usa <- usa %>% left_join(treated_year_data, by = "ctry1") %>%
  group_by(ctry1) %>%
  mutate(treated_year = ifelse(is.na(treated_year), 2013, treated_year),
         treatment_timeframe = year-treated_year,
         )

# Get treated countries
usa <- usa %>%
  group_by(ctry1) %>%
  mutate(mean = mean(treated_years),
         treated_country = ifelse(is.na(mean), 0, 1),
         treated = treatment_timeframe*treated_country, 
         treated = ifelse(treated > 0, 1, 0))

df <- usa %>% select(ctry1, IdealPointDistance, year, treatment_timeframe, ctry1_polity2,
                       ctry1_gdp_cap, ctry1_fariss, cluster,
                       treated_country, treated, gdp_cap_dist, polity2_dist, fariss_dist, sum_invest) %>%
  filter(year > 2005)

write_csv2(df, './replication_files/data/data/final_data/usa_data_with_cluster.csv')


```

```{r}
russia <- read_csv2('./replication_files/data/data/final_data/russia_data.csv')


russia <- left_join(russia, cluster_data, by = 'ctry1')


```


```{r}
# Create a new sign-date variable that is in a "date" format
russia$ctry1_mou2 <- as.Date(russia$ctry1_mou, format = c("%d.%m.%Y"))

# gsub(" ", "", russia$un_date, fixed = TRUE)

# Create a preliminary variable for UNGA session
# The UN Session we are interested in started in September. MOUs signed before 
# September of the respective years will therefore be recorded as treated.

russia <- russia %>% filter(!is.na(year)) %>%
  mutate(un_date = paste(year, "", "-09"),
         un_date = gsub(" ", "", un_date, fixed = TRUE))

# We don't need the days so we create compatible year and month variables
russia$un_date <- as.yearmon(russia$un_date)
russia$ctry1_mou <- as.yearmon(russia$ctry1_mou2)


# Now we need to create a variable for the treatment year. 
# I need to identify the first year in which the treatment took place
treated_year_data <-  russia %>%
  mutate(treated_years = ifelse(un_date > ctry1_mou, 1, 0)) %>%
  select(ctry1, year, treated_years) %>% 
  filter(treated_years == 1) %>%
  group_by(ctry1) %>%
  filter(year == min(year, na.rm = T)) %>%
  mutate(treated_year = year) %>%
  select(ctry1, treated_years, treated_year)
             

# Adding the treatment year to the data and creating a treatment timeframe   
russia <- russia %>% left_join(treated_year_data, by = "ctry1") %>%
  group_by(ctry1) %>%
  mutate(treated_year = ifelse(is.na(treated_year), 2013, treated_year),
         treatment_timeframe = year-treated_year,
         )
                            

# Get treated countries
russia <- russia %>%
  group_by(ctry1) %>%
  mutate(mean = mean(treated_years),
         treated_country = ifelse(is.na(mean), 0, 1),
         treated = treatment_timeframe*treated_country, 
         treated = ifelse(treated > 0, 1, 0))


df <- russia %>% select(ctry1, IdealPointDistance, year, treatment_timeframe, ctry1_polity2,
                       ctry1_gdp_cap, ctry1_fariss, cluster,
                       treated_country, treated, gdp_cap_dist, polity2_dist, fariss_dist, sum_invest) %>%
  filter(year > 2005)

write_csv2(df, './replication_files/data/data/final_data/russia_data_with_cluster.csv')


```


